library(tidyverse)
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
## Registered S3 method overwritten by 'rvest':
## method from
## read_xml.response xml2
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.1 ✔ purrr 0.3.2
## ✔ tibble 2.1.1 ✔ dplyr 0.8.0.1
## ✔ tidyr 0.8.3 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
RESEARCH_HOME = "/home/and/Documents/PhD/Research/"
FILES_PATH = "Scripts/R/Benchmarks/MultiAndSingleNode/R12/dblab/"
MONITOR_FILE = "monitor.log"
NOHUP_FILE = "nohup.out"
SEPARATOR_ID = "-"
lines = readLines(paste0(RESEARCH_HOME, FILES_PATH, NOHUP_FILE))
lines = lines[grepl("\\|[1-6]\\.", lines)]
nohup = as_tibble(lines) %>%
separate(value, into=c("Timestamp", "ID", "Time", "Stage", "Duration", "Load", "TS"), sep="\\|") %>%
separate(ID, into=c(NA, NA, "ID"), sep = SEPARATOR_ID) %>%
mutate(ID = as.numeric(ID), Time = as.numeric(Time), Duration = as.numeric(Duration))
## Warning: Calling `as_tibble()` on a vector is discouraged, because the behavior is likely to change in the future. Use `tibble::enframe(name = NULL)` instead.
## This warning is displayed once per session.
nohupStages = nohup %>% mutate(Stage = paste0(TS,".",str_trim(Stage))) %>%
mutate(Start = Time - Duration, End = Time) %>%
select(ID, Stage, Start, End, Duration)
nohupTimeintervals = nohup %>% select(ID, TS, Time, Duration) %>%
mutate(Start = Time - Duration, End = Time) %>%
group_by(ID, TS) %>% summarise(Start = min(Start), End = max(End))
checkF <- function(ID, Stage, TS, Time, Start, End){
check = Start < Time && Time < End
x <- as_tibble(check)
}
getStageStart <- function(d){
join = d %>% left_join(nohupTimeintervals, by = "ID")
checks = join %>% select(ID, Stage, TS, Time, Start, End) %>% pmap_dfr(checkF)
starts = cbind(join, checks) %>% filter(value) %>%
mutate(Stage = paste0(TS,".",Stage)) %>%
select(Stage, Nodes, Time, y) %>%
group_by(Stage, Nodes) %>%
summarise(Start=min(Time), y=min(y)) %>% arrange(Start)
return(starts)
}
lines = readLines(paste0(RESEARCH_HOME, FILES_PATH, MONITOR_FILE))
lines = lines[grepl("\\|SCALE\\|", lines)]
monitor = as_tibble(lines) %>%
separate(value, into=c("Timestamp", "Scale", "Time", "ID", "Nodes", "StageID", "Stage", "RDDs", "Task", "Dura", "Load"), sep="\\|") %>%
separate(ID, into=c(NA, NA, "ID"), sep = SEPARATOR_ID) %>%
select(ID, Time, Nodes, StageID, Stage, RDDs, Task, Load) %>%
mutate(ID=as.numeric(ID), Time=as.numeric(Time), StageID = as.numeric(StageID), RDDs=as.numeric(RDDs), Tasks=as.numeric(Task), Load=as.numeric(Load)) %>%
group_by(ID, Time, Nodes, StageID, Stage) %>% summarise(RDDs=mean(RDDs), Tasks=mean(Tasks), Load=mean(Load)) %>%
filter(StageID != -1)
monitor$Nodes = (monitor$ID %% 3) + 1
head(monitor)
apps = monitor %>% ungroup() %>% select(ID, Nodes) %>% distinct()
head(apps, n=Inf)
d = monitor %>% filter(ID %in% c(0,1,2)) %>% ungroup %>%
mutate(y = Load, Nodes = factor(Nodes)) %>%
select(ID, Time, y, Nodes, Stage) %>%
arrange(Time)
stageStarts =getStageStart(d)
p = ggplot(data = d, aes(x = Time, y = y, group = 1, color = Nodes, linetype = Nodes)) +
geom_line() +
geom_point(data = stageStarts, aes(x=Start, y=y, group=1, color=Nodes, text = paste(Stage,"<br>Start:",Start,"<br>",y)))
## Warning: Ignoring unknown aesthetics: text
ggplotly(p, tooltip = c("text"))